home *** CD-ROM | disk | FTP | other *** search
- /*
- cookietool is (c) 1995-2000 by Wilhelm Noeker (wnoeker@t-online.de)
-
- This program is free software; you can redistribute it and/or
- modify it under the terms of the GNU General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA
-
- */
-
-
- /*========================================================================*\
- | File: cdbsplit.c Date: 25 Oct 1997 |
- *------------------------------------------------------------------------*
- | Split parts off your cookie database, by keyword, by line length, |
- | by number of lines, or as groups of "similar" cookies. |
- | Expected file format is plain text with a "%%" line ending each cookie.|
- | See help() for usage notes. |
- | |
- \*========================================================================*/
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <ctype.h>
- #include "strstuff.h"
-
- char version[] = "$VER: cdbsplit 2.3 (17.07.2000)";
- #define EOC "%%" /* the cookie delimiter */
-
-
- #define FBUFSIZE 16384 /* we'll use larger file buffers */
- #define CBUFSIZE 32000L
- #define LBUFSIZE 2000
- #define NTARGET 10 /* max. number of keywords to search for */
- char line[LBUFSIZE]; /* large enough to hold the longest line */
- char cbuf[CBUFSIZE]; /* large enough to hold one complete cookie */
- char cbak[CBUFSIZE]; /* backup of the last cookie, to find groups */
- char uppercase[256]; /* conversion table */
-
- int l_min = 1, l_max = 0, w_min = 1, w_max = 0;
- long n_min = 1, n_max = 0;
- int matchlen = 0;
- char *rtarget[NTARGET]; /* required keywords */
- char *otarget[NTARGET]; /* optional keywords */
- int required = 0, optional = 0;
-
-
- /*
- * print a help text and nag about illegal parameter <s>
- */
- void help( char *s )
- {
- if( s )
- printf( "illegal option '%s'\n", s );
- printf( "usage: cdbsplit [options] <cookiefile> <newfile>\n" );
- printf( "where options are:\n" );
- printf( " -k<keywd> / -K<keywd> search for optional / required keyword\n" );
- printf( " -l<lines> / -L<lines> range for number of lines in a cookie\n" );
- printf( " -w<width> / -W<width> range for cookie line width\n" );
- printf( " -n<no.> / -N<no.> range of cookie numbers\n" );
- printf( " -m<m> extract groups of cookies with <m> matching chars\n" );
- printf( " -d[0-3] how fussy about word delimiters? (default: 2) \n" );
- printf( " -c case sensitive comparisons\n" );
- printf( " -x extract only, do not modify the input file\n" );
- printf( " -a append to an existing output file\n" );
- }
-
-
-
- int dispatch( char *cookie, int good, FILE *fp2, FILE *fp3 )
- {
- long result = 1;
-
- if( good )
- result = fprintf( fp2, "%s%s\n", cookie, EOC );
- else if( fp3 )
- result = fprintf( fp3, "%s%s\n", cookie, EOC );
- if( result <= 0 )
- {
- printf( "\nfile error, aborted !!!\n" );
- exit( 20 );
- }
- return good ? 1 : 0;
- }
-
-
- long filter_cookies( FILE *fp1, FILE *fp2, FILE *fp3 )
- /* scatter contents of <fp1> across <fp2> (hitfile) and <fp3> (dumpfile),
- * return value is the number of cookies in the dumpfile
- */
- {
- long count = 0, hits = 0, cbuflen;
- int ok = 0, ok2, lines, width, w, i;
-
- strcpy( cbak, "" );
- strcpy( cbuf, "" );
- cbuflen = lines = width = 0;
- while( fgets( line, LBUFSIZE, fp1 ) )
- {
- if( strncmp( line, EOC, strlen( EOC ) ) == 0 )
- { /* "end of cookie"-marker */
- count++;
- /* perform the checks: */
- if( matchlen ) /* "similar cookies" mode */
- {
- ok2 = ok;
- ok = (strn_cmp( cbak, cbuf, matchlen ) == 0);
- if( *cbak ) /* skip the first loop */
- hits += dispatch( cbak, ok || ok2, fp2, fp3 );
- strcpy( cbak, cbuf );
- }
- else /* standard mode */
- {
- ok = (lines >= l_min && width >= w_min && count >= n_min);
- if( l_max )
- ok = ok && ( lines <= l_max );
- if( w_max )
- ok = ok && ( width <= w_max );
- if( n_max )
- ok = ok && ( count <= n_max );
- if( ok ) /* string checks still necessary? */
- {
- if( required == 0 && optional > 0 )
- ok = 0;
- for( i = 0; i < required; i++ )
- ok = ok && (str_str( cbuf, rtarget[i] ) != NULL);
- for( i = 0; i < optional; i++ )
- ok = ok || (str_str( cbuf, otarget[i] ) != NULL);
- }
- hits += dispatch( cbuf, ok, fp2, fp3 );
- }
- if( count % 100 == 0 )
- {
- printf( "%ld hits/%ld misses.\r", hits, count - hits );
- fflush( stdout );
- }
- strcpy( cbuf, "" ); /* start a new cookie */
- cbuflen = lines = width = 0;
- }
- else /* add a line to the current cookie */
- {
- w = strlen( line ) - 1; /* don't count the "\n" */
- if( (cbuflen += w) >= CBUFSIZE )
- {
- printf( "\ncookie too big (>%ld chars) \n", CBUFSIZE );
- exit( 20 );
- }
- strcat( cbuf, line );
- lines++;
- if( w > width )
- width = w;
- }
- }
- if( matchlen ) /* one cookie still pending in this mode */
- hits += dispatch( cbak, ok, fp2, fp3 );
- printf( "Done, %ld hits out of %ld\n", hits, count );
- return (count - hits);
- }
-
-
-
- void confirm_options()
- /* tell the user what his options will do */
- {
- int flags, i;
-
- if( matchlen )
- printf( " searching for groups of cookies with "
- "%d matching characters.\n", matchlen );
- else
- {
- flags = 0xf; /* what restrictions do apply? */
- if( optional + required > 0 )
- {
- printf( " search string%s: ", (optional + required > 1) ? "s" : "");
- if( required > 1 && optional > 0 )
- printf("(");
- if( required > 0 )
- {
- printf( "\"%s\"", rtarget[0] );
- for( i = 1; i < required; i++ )
- printf( " && \"%s\"", rtarget[i] );
- }
- if( required > 1 && optional > 0 )
- printf(")");
- if( required > 0 && optional > 0 )
- printf(" || ");
- if( optional > 0 )
- {
- printf( "\"%s\"", otarget[0] );
- for( i = 1; i < optional; i++ )
- printf( " || \"%s\"", otarget[i] );
- }
- printf( "\n" );
- }
- else
- flags ^= 1;
- if( l_max )
- printf( " looking for cookies %d - %d lines long.\n", l_min, l_max );
- else if( l_min > 1 )
- printf( " looking for cookies at least %d lines long.\n", l_min );
- else
- flags ^= 2;
- if( w_max )
- printf( " looking for cookies %d - %d columns wide.\n", w_min, w_max );
- else if( w_min > 1 )
- printf( " looking for cookies at least %d columns wide.\n", w_min );
- else
- flags ^= 4;
- if( n_max )
- printf( " considering cookies #%ld - #%ld only.\n", n_min, n_max);
- else if( n_min > 1 )
- printf( " starting at cookie #%ld.\n", n_min );
- else
- flags ^= 8;
- if( flags == 0 )
- printf( " no restrictions.\n" );
- }
- }
-
-
- int main( int argc, char *argv[] )
- {
- char *s;
- char *name1 = NULL, *name2 = NULL, *name3 = "cdb_temp_kickme";
- int append = 0, move = 1;
- int case_sense = 0, bordermode = 2;
- long dumped;
- FILE *infile, *hitfile, *dumpfile;
-
- if( argc < 3 )
- {
- help( NULL );
- return 5;
- }
- while( --argc )
- {
- s = *++argv;
- if( *s != '-' )
- {
- if( name1 == NULL )
- name1 = s;
- else
- name2 = s;
- }
- else
- {
- s++;
- switch( *s++ )
- {
- case 'k':
- if( optional < NTARGET )
- otarget[ optional++ ] = s;
- break;
- case 'K':
- if( required < NTARGET )
- rtarget[ required++ ] = s;
- break;
- case 'm':
- matchlen = atoi( s );
- break;
- case 'l':
- l_min = atoi( s );
- break;
- case 'L':
- l_max = atoi( s );
- break;
- case 'w':
- w_min = atoi( s );
- break;
- case 'W':
- w_max = atoi( s );
- break;
- case 'n':
- n_min = atol( s );
- break;
- case 'N':
- n_max = atol( s );
- break;
- case 'a':
- append = 1;
- break;
- case 'x':
- move = 0;
- break;
- case 'c':
- case_sense = 1;
- break;
- case 'd':
- if( isdigit( *++s ) )
- bordermode = atoi( s );
- else
- {
- help( argv[0] );
- return 5;
- }
- break;
- default:
- help( argv[0] );
- return 5;
- }
- }
- }
- str_setup( bordermode, case_sense ); /* !!! */
- if( name1 == NULL || name2 == NULL )
- {
- help( NULL );
- return 5;
- }
- if( !(infile = fopen( name1, "r" ) ) )
- {
- printf( "Can't open '%s' for input!\n", name1 );
- return 10;
- }
- setvbuf( infile, NULL, _IOFBF, FBUFSIZE );
- if( !append && (hitfile = fopen( name2, "r" )) )
- {
- printf( "Error: '%s' exists! Use -a to append.\n", name2 );
- return 10;
- }
- if( !(hitfile = fopen( name2, "a" )) )
- {
- printf( "Can't open '%s' for output!\n", name2 );
- return 10;
- }
- setvbuf( hitfile, NULL, _IOFBF, FBUFSIZE );
- if( move )
- {
- if( !(dumpfile = fopen( name3, "w" ) ) )
- {
- printf( "Can't open '%s' for output!\n", name3 );
- return 10;
- }
- setvbuf( dumpfile, NULL, _IOFBF, FBUFSIZE );
- }
- else
- dumpfile = NULL;
- printf( "CdbSplit " );
- print_strstat();
- printf( "%sing from '%s' to '%s',\n", move ? "Mov" : "Copy", name1, name2 );
- confirm_options();
- /* OK, here we go: */
- dumped = filter_cookies( infile, hitfile, dumpfile );
- fclose( infile );
- fclose( hitfile );
- if( move )
- {
- fclose( dumpfile );
- if( remove( name1 ) != 0 || rename( name3, name1 ) != 0 )
- {
- printf( "Couldn't overwrite the input file! Your cookies are in '%s'.\n", name3 );
- return 5;
- }
- if( dumped == 0 )
- {
- remove( name1 );
- printf( "'%s' is now empty and has been deleted.\n", name1 );
- }
- }
- return 0;
- }
-
-